# Purpose: Produce the n-tile origin->destination movers plots

source("constants.R")

############################
## 1. Setup preliminaries ##
############################

### Other dat 1: Regional Coefficients
# See the README for instructions on how to access this data
regional_measures <- read_csv("output/regional_measures.csv") %>%
  filter(sample == "all", measure == "n_frnd_nat_lcl") %>%
  select(nuts3, sy_avg_resid, sy_n, native_n)

### Other dat 2: Build data for residualization on intensive margin ###
# See the README for instructions on how to access this data
residualize_intensive <- read_csv("input/movers_plots_ntiles_resid_intesnive.csv")

### Other dat 3: Build data for residualization on extensive margin ###
# See the README for instructions on how to access these data
native_pop_fb <- read_csv("input/movers_plots_ntiles_resid_extensive.csv")

# See the README for instructions on how to access these data
kreis_xw <- read_csv("input/nuts3_DE_xw.csv")
kreis_demos <- read_csv("input/kreis_total_pop_by_age_gender.csv")

kreis_to_exclude <- unique(filter(kreis_demos, year == 2019, is.na(frac_syr_tot))$ags)

residualize_extensive <- kreis_demos %>%
  filter(year == 2019) %>%
  left_join(kreis_xw, by=c("ags"="ID")) %>%
  mutate(pop_native = pop_tot * (1-frac_for_tot)) %>%
  select(nuts3=NUTS3, ags, pop_tot, pop_native) %>%
  left_join(native_pop_fb, by=c("nuts3"="curr_nuts3")) %>%
  mutate(share_natives_on_fb = n_native_fb/pop_native)

# Impute the bad data with overall mean
impute_val <- mean(filter(residualize_extensive, !ags %in% kreis_to_exclude)$share_natives_on_fb)
residualize_extensive <- residualize_extensive %>%
  mutate(share_natives_on_fb = if_else(ags %in% kreis_to_exclude, impute_val, share_natives_on_fb)) %>%
  select(nuts3, share_natives_on_fb)


#############################################
## 2. Get the data set to use at user level #
#############################################

friend_cols <- c(
    "n_friendships_to_curr_country_natives_by_nuts3",
    "n_friendships_to_curr_country_natives_sy_initiated_by_nuts3",
    "n_friendships_to_curr_country_natives_nat_initiated_by_nuts3"
)
for(friend_col in friend_cols) {
    if(friend_col == "n_friendships_to_curr_country_natives_by_nuts3") {
        file_init_suffix <- ""
    } else if(friend_col == "n_friendships_to_curr_country_natives_sy_initiated_by_nuts3") {
        file_init_suffix <- "_sy_init"
    } else if(friend_col == "n_friendships_to_curr_country_natives_nat_initiated_by_nuts3") {
        file_init_suffix <- "_nat_init"
    }

    # Read in the data
    # See the README for instructions on how to access these data
    user_dat_in <- read_csv(str_interp("input/moves_ntiles_${friend_col}.csv"))

    # Get the residualized versions
    dat <- user_dat_in %>%
        mutate(curr_loc = if_else(quarters_since_move < 0, loc1, loc2)) %>%
        left_join(residualize_intensive, by=c("curr_loc"="nuts3")) %>%
        left_join(residualize_extensive, by=c("curr_loc"="nuts3"))

    dat$makes_lcl_in_quarter_resid <-
    mean(dat$makes_lcl_in_quarter) +
    resid(lm(formula=paste("makes_lcl_in_quarter", "indiv_l1080_cntrl + share_natives_on_fb", sep=" ~ "), data=dat))


    #############################
    ## 3. Make the final plots ##
    #############################

    ###### Create the ranks #####
    curr_ntiles <- 3

    ntiles_dat <- regional_measures %>%
      mutate(nuts3_ntile = ntiles.wtd(sy_avg_resid, curr_ntiles, sy_n)) %>%
      select(nuts3, nuts3_ntile, sy_avg_resid)

    # And combine with the regional ranks
    plot_dat0 <- dat %>%
    inner_join(ntiles_dat, by=c("loc1"="nuts3")) %>%
    rename(
        loc1_ntile = nuts3_ntile,
        loc1_coeff = sy_avg_resid) %>%
    inner_join(ntiles_dat, by=c("loc2"="nuts3")) %>%
    rename(
        loc2_ntile = nuts3_ntile,
        loc2_coeff = sy_avg_resid)

    #### V1: Syrians to Natives
    plot_dat <- plot_dat0 %>%
    group_by(quarters_since_move,
            loc1_ntile = loc1_ntile,
            loc2_ntile = loc2_ntile) %>%
    summarise(
        var = var(makes_lcl_in_quarter),
        makes_lcl_in_quarter = mean(makes_lcl_in_quarter),
        var_resid = var(makes_lcl_in_quarter_resid),
        makes_lcl_in_quarter_resid = mean(makes_lcl_in_quarter_resid),
        N = n()) %>%
    ungroup() %>%
    mutate(
        path = paste(loc1_ntile, loc2_ntile, sep= " to "),
        se = sqrt(var)/sqrt(N),
        se_resid = sqrt(var_resid)/sqrt(N))

    write_csv(plot_dat, str_interp("output/sy_movers_terciles${file_init_suffix}.csv"))

    tibble(
      n_movers = nrow(filter(user_dat_in, quarters_since_move == 0)),
      n_moves = length(unique(user_dat_in$rid))) %>%
      write_csv(str_interp("output/sy_movers_terciles_Ns${file_init_suffix}.csv"))

    ### Plot min and max constant throughout
    y_min <- min(plot_dat$makes_lcl_in_quarter_resid - (1.96*plot_dat$se))
    y_max <- max(plot_dat$makes_lcl_in_quarter_resid + (1.96*plot_dat$se))

    # This produces Figure 3
    
    ##### Bottom to all #####
    plot_dat %>%
        filter(loc1_ntile == 1) %>%
        ggplot(aes(x=quarters_since_move, y=makes_lcl_in_quarter_resid, col=path, shape=path)) +
            geom_line() +
            geom_point(size=3.5) +
            geom_errorbar(aes(ymin=makes_lcl_in_quarter_resid-(se*1.96), ymax=makes_lcl_in_quarter_resid+(se*1.96)), width=.3, size=.3,
                            position=position_dodge(0.05)) +
            theme_classic() +
            scale_color_manual(values=c25[c(1,2,6)]) +
            geom_vline(xintercept=-0.5, col="gray") +
            labs(x = "Quarters Since Move", y="Probability of Making a Local Native Friend (%)", col="", shape="") +
            scale_x_continuous(breaks=-4:5) +
            lims(y=c(y_min, y_max)) +
            theme(legend.position="bottom")

    ggsave(str_interp("output/sy_movers_terciles_1_to_all${file_init_suffix}.png"), last_plot(), width=5, height=5)


    ##### Top to all #####
    plot_dat %>%
        filter(loc1_ntile == curr_ntiles) %>%
        ggplot(aes(x=quarters_since_move, y=makes_lcl_in_quarter_resid, col=path, shape=path)) +
            geom_line() +
            geom_point(size=3.5) +
            geom_errorbar(aes(ymin=makes_lcl_in_quarter_resid-(se*1.96), ymax=makes_lcl_in_quarter_resid+(se*1.96)), width=.3, size=.3,
                            position=position_dodge(0.05)) +
            theme_classic() +
            scale_color_manual(values=c25[c(1,2,6)]) +
            geom_vline(xintercept=-0.5, col="gray") +
            labs(x = "Quarters Since Move", y="Probability of Making a Local Native Friend (%)", col="", shape="") +
            scale_x_continuous(breaks=-4:5) +
            lims(y=c(y_min, y_max)) +
            theme(legend.position="bottom")

    ggsave(str_interp("output/sy_movers_terciles_3_to_all${file_init_suffix}.png"), last_plot(), width=5, height=5)
}


############################################
## 4. Make the native plots for revisions ##
############################################

user_dat_in_natives_to_sy <- read_csv("input/moves_ntiles_natives_to_sy_n_friendships_to_sy_migrants_by_nuts3")

# Get the residualized versions
dat_natives <- user_dat_in_natives_to_sy %>%
    mutate(curr_loc = if_else(quarters_since_move < 0, loc1, loc2)) %>%
    left_join(residualize_intensive, by=c("curr_loc"="nuts3")) %>%
    left_join(residualize_extensive, by=c("curr_loc"="nuts3"))

dat_natives$makes_lcl_in_quarter_resid <-
    mean(dat_natives$makes_lcl_in_quarter) +
    resid(lm(formula=paste("makes_lcl_in_quarter", "indiv_l1080_cntrl + share_natives_on_fb", sep=" ~ "), data=dat_natives))

ntiles_dat <- regional_measures %>%
    mutate(nuts3_ntile = ntiles.wtd(sy_avg_resid, 4, native_n)) %>%
    select(nuts3, nuts3_ntile, sy_avg_resid)

# And combine with the regional ranks
plot_dat_native0 <- dat_natives %>%
    inner_join(ntiles_dat, by=c("loc1"="nuts3")) %>%
    rename(
        loc1_ntile = nuts3_ntile,
        loc1_coeff = sy_avg_resid) %>%
    inner_join(ntiles_dat, by=c("loc2"="nuts3")) %>%
    rename(
        loc2_ntile = nuts3_ntile,
        loc2_coeff = sy_avg_resid)

plot_dat_native <- plot_dat_native0 %>%
    group_by(quarters_since_move,
        loc1_ntile = loc1_ntile,
        loc2_ntile = loc2_ntile) %>%
    summarise(
        var = var(makes_lcl_in_quarter),
        makes_lcl_in_quarter = mean(makes_lcl_in_quarter),
        var_resid = var(makes_lcl_in_quarter_resid),
        makes_lcl_in_quarter_resid = mean(makes_lcl_in_quarter_resid),
        N = n()) %>%
    ungroup() %>%
    mutate(
        path = paste(loc1_ntile, loc2_ntile, sep= " to "),
        se = sqrt(var)/sqrt(N),
        se_resid = sqrt(var_resid)/sqrt(N))

### Plot min and max constant throughout
y_min <- min(plot_dat_native$makes_lcl_in_quarter_resid - (1.96*plot_dat_native$se))
y_max <- max(plot_dat_native$makes_lcl_in_quarter_resid + (1.96*plot_dat_native$se))

# This produces Figure 5

##### Bottom to all #####
plot_dat_native %>%
    filter(loc1_ntile == 1) %>%
    ggplot(aes(x=quarters_since_move, y=makes_lcl_in_quarter_resid, col=path, shape=path)) +
        geom_line() +
        geom_point(size=3.5) +
        geom_errorbar(aes(ymin=makes_lcl_in_quarter_resid-(se*1.96), ymax=makes_lcl_in_quarter_resid+(se*1.96)), width=.3, size=.3,
                        position=position_dodge(0.05)) +
        theme_classic() +
        scale_color_manual(values=c25[c(1,2,6,3,4)]) +
        geom_vline(xintercept=-0.5, col="gray") +
        labs(x = "Quarters Since Move", y="Probability of Making a Local Syrian Friend (%)", col="", shape="") +
        scale_x_continuous(breaks=-4:5) +
        lims(y=c(y_min, y_max)) +
        theme(legend.position="bottom")

ggsave("output/native_movers_quartiles_1_to_all.png", last_plot(), width=5, height=5)


##### Top to all #####
plot_dat_native %>%
    filter(loc1_ntile == 4) %>%
    ggplot(aes(x=quarters_since_move, y=makes_lcl_in_quarter_resid, col=path, shape=path)) +
        geom_line() +
        geom_point(size=3.5) +
        geom_errorbar(aes(ymin=makes_lcl_in_quarter_resid-(se*1.96), ymax=makes_lcl_in_quarter_resid+(se*1.96)), width=.3, size=.3,
                        position=position_dodge(0.05)) +
        theme_classic() +
        scale_color_manual(values=c25[c(1,2,6,3,4)]) +
        geom_vline(xintercept=-0.5, col="gray") +
        labs(x = "Quarters Since Move", y="Probability of Making a Local Syrian Friend (%)", col="", shape="") +
        scale_x_continuous(breaks=-4:5) +
        lims(y=c(y_min, y_max)) +
        theme(legend.position="bottom")

ggsave("output/native_movers_quartiles_4_to_all.png", last_plot(), width=5, height=5)
